# -*-Python-*-

# vanilla_transformer with factorized vocab embedding.

import mesh_tensorflow.transformer.vocab_embeddings

dropout_rate = 0.0
Unitransformer.shared_embedding_and_softmax_weights = False

transformer.get_vocab_embedding_cls.cls = @vocab_embeddings.FactorizedVocabEmbedding
vocab_embeddings.FactorizedVocabEmbedding.inner_dimension_size = 128
